import jieba
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

keywords = ["人工智能", "机器学习", "神经网络"]
article = "近年来，人工智能技术在各个领域迅速发展，特别是机器学习和深度学习模型。"

# 中文分词


def tokenize(text): 
    return " ".join(jieba.lcut(text))


corpus = [" ".join(keywords), tokenize(article)]
print(f"分词结果：{corpus}")
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(corpus)

similarity = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])
print(f"TF-IDF相关性：{similarity}")
